notes\Use-Cases\massive-traffic\02-backpressure-rate-limiting.md

02 Backpressure Rate Limiting

Rapid overview

Backpressure & Rate Limiting: Protecting Your System Under Load
Why You MUST Have Backpressure
1. ASP.NET Core Built-in Rate Limiting (.NET 7+)
Basic Setup
Apply to Specific Endpoints
2. Per-User/Per-IP Rate Limiting
3. Concurrency Limiter (Semaphore Pattern)
Manual Semaphore Implementation
Middleware for Global Concurrency Limit
4. Bounded Queues for Background Work
5. Distributed Rate Limiting with Redis
Summary: Backpressure Checklist

Backpressure & Rate Limiting: Protecting Your System Under Load

Why You MUST Have Backpressure

The Problem: When traffic spikes, accepting every request leads to:

Queue explosion (memory exhaustion)
Database connection pool exhaustion
Cascading failures to downstream services
Increased latency for ALL users (death spiral)

The Solution: Reject requests early when you're at capacity. Better to serve 10,000 requests fast with 1,000 rejections than have all 11,000 requests time out.

Key Principle: Fail fast, fail explicitly, preserve capacity for requests you can handle.

---

1. ASP.NET Core Built-in Rate Limiting (.NET 7+)

The framework includes rate limiting middleware. Use it.

Basic Setup

// Program.cs
using System.Threading.RateLimiting;
using Microsoft.AspNetCore.RateLimiting;

var builder = WebApplication.CreateBuilder(args);

builder.Services.AddRateLimiter(options =>
{
    // Fixed window: X requests per time window
    options.AddFixedWindowLimiter("fixed", opt =>
    {
        opt.PermitLimit = 100;
        opt.Window = TimeSpan.FromMinutes(1);
        opt.QueueProcessingOrder = QueueProcessingOrder.OldestFirst;
        opt.QueueLimit = 10; // How many can queue waiting for a slot
    });

    // Sliding window: Smoother than fixed (prevents reset spike)
    options.AddSlidingWindowLimiter("sliding", opt =>
    {
        opt.PermitLimit = 100;
        opt.Window = TimeSpan.FromMinutes(1);
        opt.SegmentsPerWindow = 6; // 6 segments of 10 seconds each
        opt.QueueProcessingOrder = QueueProcessingOrder.OldestFirst;
        opt.QueueLimit = 0; // Reject immediately when full
    });

    // Token bucket: Allows bursts
    options.AddTokenBucketLimiter("token", opt =>
    {
        opt.TokenLimit = 100;
        opt.ReplenishmentPeriod = TimeSpan.FromMinutes(1);
        opt.TokensPerPeriod = 100;
        opt.QueueProcessingOrder = QueueProcessingOrder.OldestFirst;
        opt.QueueLimit = 0;
    });

    // Concurrency limiter: Max simultaneous requests
    options.AddConcurrencyLimiter("concurrent", opt =>
    {
        opt.PermitLimit = 50; // Only 50 requests executing at once
        opt.QueueProcessingOrder = QueueProcessingOrder.OldestFirst;
        opt.QueueLimit = 25; // 25 more can wait
    });

    // Default rejection response
    options.OnRejected = async (context, ct) =>
    {
        context.HttpContext.Response.StatusCode = StatusCodes.Status429TooManyRequests;

        if (context.Lease.TryGetMetadata(MetadataName.RetryAfter, out var retryAfter))
        {
            context.HttpContext.Response.Headers.RetryAfter = retryAfter.TotalSeconds.ToString();
        }

        await context.HttpContext.Response.WriteAsJsonAsync(new
        {
            error = "Too many requests. Please retry later.",
            retryAfter = retryAfter?.TotalSeconds
        }, cancellationToken: ct);
    };
});

var app = builder.Build();

// Apply globally
app.UseRateLimiter();

app.MapControllers();
app.Run();

Apply to Specific Endpoints

[ApiController]
[Route("api/[controller]")]
public class OrdersController : ControllerBase
{
    // High-traffic endpoint: strict limit
    [HttpPost]
    [EnableRateLimiting("sliding")]
    public async Task<IActionResult> CreateOrderAsync(
        CreateOrderRequest request,
        CancellationToken ct)
    {
        // Process order
        return Ok();
    }

    // Read endpoint: more lenient
    [HttpGet("{id}")]
    [EnableRateLimiting("token")]
    public async Task<IActionResult> GetOrderAsync(int id, CancellationToken ct)
    {
        // Fetch order
        return Ok();
    }

    // Expensive report: concurrency limit
    [HttpGet("reports/{id}")]
    [EnableRateLimiting("concurrent")]
    public async Task<IActionResult> GetReportAsync(int id, CancellationToken ct)
    {
        // Generate report (expensive)
        return Ok();
    }
}

---

2. Per-User/Per-IP Rate Limiting

Prevent a single user from monopolizing resources.

// Custom partition key resolver
builder.Services.AddRateLimiter(options =>
{
    options.AddSlidingWindowLimiter("per-user", opt =>
    {
        opt.PermitLimit = 10;
        opt.Window = TimeSpan.FromSeconds(10);
        opt.SegmentsPerWindow = 2;
        opt.QueueLimit = 0;
    }).AddPolicy("per-user-policy", context =>
    {
        // Get user ID from claims or API key
        var userId = context.User.FindFirst(ClaimTypes.NameIdentifier)?.Value
                     ?? context.Request.Headers["X-API-Key"].ToString()
                     ?? context.Connection.RemoteIpAddress?.ToString()
                     ?? "anonymous";

        return RateLimitPartition.GetSlidingWindowLimiter(userId, _ => new SlidingWindowRateLimiterOptions
        {
            PermitLimit = 100,
            Window = TimeSpan.FromMinutes(1),
            SegmentsPerWindow = 6,
            QueueLimit = 0
        });
    });
});

// Apply to controller
[EnableRateLimiting("per-user-policy")]
[ApiController]
[Route("api/[controller]")]
public class UserController : ControllerBase
{
    // Each user gets their own 100 req/min limit
}

Why this matters:

One abusive user doesn't affect others
Prevents scraping/abuse
Fair resource distribution

---

3. Concurrency Limiter (Semaphore Pattern)

When rate limiting by time isn't enough (e.g., expensive operations), limit how many run simultaneously.

Manual Semaphore Implementation

public interface IExpensiveService
{
    Task<ReportData> GenerateReportAsync(int userId, CancellationToken ct);
}

public class ExpensiveReportService : IExpensiveService
{
    private readonly SemaphoreSlim _concurrencyLimit;
    private readonly IDbConnection _db;
    private readonly ILogger<ExpensiveReportService> _logger;

    public ExpensiveReportService(
        IDbConnection db,
        ILogger<ExpensiveReportService> logger)
    {
        _db = db;
        _logger = logger;

        // Only 20 reports generating at once
        // Tune based on: CPU cores, memory, DB connection pool size
        _concurrencyLimit = new SemaphoreSlim(
            initialCount: 20,
            maxCount: 20
        );
    }

    public async Task<ReportData> GenerateReportAsync(
        int userId,
        CancellationToken ct)
    {
        // Try to acquire a slot, wait max 2 seconds
        var acquired = await _concurrencyLimit.WaitAsync(
            TimeSpan.FromSeconds(2),
            ct
        );

        if (!acquired)
        {
            _logger.LogWarning(
                "Report generation rejected for user {UserId} - concurrency limit reached",
                userId
            );
            throw new TooManyRequestsException(
                "System is under heavy load. Please retry in a moment."
            );
        }

        try
        {
            _logger.LogInformation(
                "Starting report generation for user {UserId}. Current concurrency: {Current}/{Max}",
                userId,
                20 - _concurrencyLimit.CurrentCount,
                20
            );

            // Expensive work here
            var data = await _db.QueryAsync<ReportData>(
                "EXEC GenerateUserReport @UserId",
                new { UserId = userId }
            );

            // Simulate heavy processing
            await ProcessDataAsync(data, ct);

            return new ReportData { /* ... */ };
        }
        finally
        {
            _concurrencyLimit.Release();

            _logger.LogInformation(
                "Completed report for user {UserId}. Current concurrency: {Current}/{Max}",
                userId,
                20 - _concurrencyLimit.CurrentCount,
                20
            );
        }
    }

    private async Task ProcessDataAsync(
        IEnumerable<ReportData> data,
        CancellationToken ct)
    {
        foreach (var item in data)
        {
            ct.ThrowIfCancellationRequested();
            // Heavy CPU work
            await Task.Delay(50, ct);
        }
    }
}

public class TooManyRequestsException : Exception
{
    public TooManyRequestsException(string message) : base(message) { }
}

Middleware for Global Concurrency Limit

public class ConcurrencyLimitMiddleware
{
    private readonly RequestDelegate _next;
    private readonly SemaphoreSlim _semaphore;
    private readonly ILogger<ConcurrencyLimitMiddleware> _logger;

    public ConcurrencyLimitMiddleware(
        RequestDelegate next,
        ILogger<ConcurrencyLimitMiddleware> logger,
        int maxConcurrency = 1000)
    {
        _next = next;
        _logger = logger;
        _semaphore = new SemaphoreSlim(maxConcurrency, maxConcurrency);
    }

    public async Task InvokeAsync(HttpContext context)
    {
        var acquired = await _semaphore.WaitAsync(TimeSpan.FromMilliseconds(100));

        if (!acquired)
        {
            _logger.LogWarning(
                "Request rejected - global concurrency limit reached. Path: {Path}",
                context.Request.Path
            );

            context.Response.StatusCode = StatusCodes.Status503ServiceUnavailable;
            context.Response.Headers.RetryAfter = "5";

            await context.Response.WriteAsJsonAsync(new
            {
                error = "Service temporarily overloaded. Please retry.",
                retryAfter = 5
            });

            return;
        }

        try
        {
            await _next(context);
        }
        finally
        {
            _semaphore.Release();
        }
    }
}

// Register in Program.cs
app.UseMiddleware<ConcurrencyLimitMiddleware>(maxConcurrency: 2000);

When to use:

CPU-intensive endpoints (reports, image processing)
Memory-intensive operations
Expensive database queries
Calls to rate-limited external APIs

---

4. Bounded Queues for Background Work

Don't let background job queues grow unbounded.

public interface IBackgroundJobQueue
{
    ValueTask<bool> QueueAsync(Func<CancellationToken, Task> workItem, CancellationToken ct);
    ValueTask<Func<CancellationToken, Task>?> DequeueAsync(CancellationToken ct);
}

public class BoundedBackgroundJobQueue : IBackgroundJobQueue
{
    private readonly Channel<Func<CancellationToken, Task>> _queue;
    private readonly ILogger<BoundedBackgroundJobQueue> _logger;

    public BoundedBackgroundJobQueue(
        int capacity,
        ILogger<BoundedBackgroundJobQueue> logger)
    {
        _logger = logger;

        // Bounded channel: reject writes when full
        var options = new BoundedChannelOptions(capacity)
        {
            FullMode = BoundedChannelFullMode.DropWrite // Or DropOldest, Wait
        };

        _queue = Channel.CreateBounded<Func<CancellationToken, Task>>(options);
    }

    public async ValueTask<bool> QueueAsync(
        Func<CancellationToken, Task> workItem,
        CancellationToken ct)
    {
        var queued = await _queue.Writer.WaitToWriteAsync(ct);

        if (!queued)
        {
            _logger.LogWarning("Background queue is full - job rejected");
            return false;
        }

        if (!_queue.Writer.TryWrite(workItem))
        {
            _logger.LogWarning("Failed to write to background queue");
            return false;
        }

        return true;
    }

    public async ValueTask<Func<CancellationToken, Task>?> DequeueAsync(
        CancellationToken ct)
    {
        var workItem = await _queue.Reader.ReadAsync(ct);
        return workItem;
    }
}

// Background service processing the queue
public class BackgroundJobProcessor : BackgroundService
{
    private readonly IBackgroundJobQueue _queue;
    private readonly ILogger<BackgroundJobProcessor> _logger;

    public BackgroundJobProcessor(
        IBackgroundJobQueue queue,
        ILogger<BackgroundJobProcessor> logger)
    {
        _queue = queue;
        _logger = logger;
    }

    protected override async Task ExecuteAsync(CancellationToken stoppingToken)
    {
        _logger.LogInformation("Background job processor started");

        while (!stoppingToken.IsCancellationRequested)
        {
            try
            {
                var workItem = await _queue.DequeueAsync(stoppingToken);

                if (workItem != null)
                {
                    await workItem(stoppingToken);
                }
            }
            catch (OperationCanceledException)
            {
                // Expected on shutdown
                break;
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Error processing background job");
            }
        }

        _logger.LogInformation("Background job processor stopped");
    }
}

// Usage in controller
[ApiController]
[Route("api/[controller]")]
public class NotificationController : ControllerBase
{
    private readonly IBackgroundJobQueue _jobQueue;

    [HttpPost("send")]
    public async Task<IActionResult> SendNotificationAsync(
        NotificationRequest request,
        CancellationToken ct)
    {
        var queued = await _jobQueue.QueueAsync(async token =>
        {
            // Expensive work: send email, SMS, push notification
            await SendEmailAsync(request.Email, token);
            await SendSmsAsync(request.Phone, token);
        }, ct);

        if (!queued)
        {
            return StatusCode(
                StatusCodes.Status503ServiceUnavailable,
                "System is overloaded. Please retry later."
            );
        }

        return Accepted(new { message = "Notification queued for processing" });
    }

    private Task SendEmailAsync(string email, CancellationToken ct)
    {
        // Implementation
        return Task.CompletedTask;
    }

    private Task SendSmsAsync(string phone, CancellationToken ct)
    {
        // Implementation
        return Task.CompletedTask;
    }
}

// Registration
builder.Services.AddSingleton<IBackgroundJobQueue>(
    sp => new BoundedBackgroundJobQueue(
        capacity: 10000, // Tune based on memory
        sp.GetRequiredService<ILogger<BoundedBackgroundJobQueue>>()
    )
);
builder.Services.AddHostedService<BackgroundJobProcessor>();

Why bounded queues:

Prevents memory exhaustion
Fails fast when overloaded
Forces clients to retry (with backoff)
Preserves system stability

---

5. Distributed Rate Limiting with Redis

For multi-instance deployments, use Redis for shared rate limit state.

public interface IDistributedRateLimiter
{
    Task<bool> AllowRequestAsync(string key, int limit, TimeSpan window, CancellationToken ct);
}

public class RedisRateLimiter : IDistributedRateLimiter
{
    private readonly IDatabase _redis;
    private readonly ILogger<RedisRateLimiter> _logger;

    public RedisRateLimiter(
        IConnectionMultiplexer redis,
        ILogger<RedisRateLimiter> logger)
    {
        _redis = redis.GetDatabase();
        _logger = logger;
    }

    public async Task<bool> AllowRequestAsync(
        string key,
        int limit,
        TimeSpan window,
        CancellationToken ct)
    {
        var redisKey = $"ratelimit:{key}";

        try
        {
            // Sliding window log algorithm using Redis sorted set
            var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
            var windowStart = now - (long)window.TotalMilliseconds;

            // Remove old entries outside the window
            await _redis.SortedSetRemoveRangeByScoreAsync(
                redisKey,
                double.NegativeInfinity,
                windowStart
            );

            // Count current entries in window
            var currentCount = await _redis.SortedSetLengthAsync(redisKey);

            if (currentCount >= limit)
            {
                _logger.LogWarning(
                    "Rate limit exceeded for key {Key}. Current: {Current}, Limit: {Limit}",
                    key, currentCount, limit
                );
                return false;
            }

            // Add current request
            await _redis.SortedSetAddAsync(redisKey, now, now);

            // Set expiry to clean up old keys
            await _redis.KeyExpireAsync(redisKey, window);

            return true;
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Redis rate limiter error for key {Key}. Allowing request.", key);
            // Fail open: allow request if Redis is down
            return true;
        }
    }
}

// Lua script for atomic sliding window (more efficient)
public class RedisRateLimiterOptimized : IDistributedRateLimiter
{
    private readonly IDatabase _redis;
    private readonly ILogger<RedisRateLimiterOptimized> _logger;

    private const string LuaScript = @"
        local key = KEYS[1]
        local now = tonumber(ARGV[1])
        local window = tonumber(ARGV[2])
        local limit = tonumber(ARGV[3])

        local clearBefore = now - window
        redis.call('ZREMRANGEBYSCORE', key, 0, clearBefore)

        local amount = redis.call('ZCARD', key)
        if amount < limit then
            redis.call('ZADD', key, now, now)
            redis.call('EXPIRE', key, window / 1000)
            return 1
        end
        return 0
    ";

    public async Task<bool> AllowRequestAsync(
        string key,
        int limit,
        TimeSpan window,
        CancellationToken ct)
    {
        var redisKey = $"ratelimit:{key}";
        var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();

        try
        {
            var result = await _redis.ScriptEvaluateAsync(
                LuaScript,
                new RedisKey[] { redisKey },
                new RedisValue[]
                {
                    now,
                    (long)window.TotalMilliseconds,
                    limit
                }
            );

            return (int)result == 1;
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Redis rate limiter error. Allowing request.");
            return true; // Fail open
        }
    }
}

// Usage in middleware
public class DistributedRateLimitMiddleware
{
    private readonly RequestDelegate _next;
    private readonly IDistributedRateLimiter _rateLimiter;

    public async Task InvokeAsync(HttpContext context)
    {
        var userId = context.User.FindFirst(ClaimTypes.NameIdentifier)?.Value
                     ?? context.Connection.RemoteIpAddress?.ToString()
                     ?? "anonymous";

        var allowed = await _rateLimiter.AllowRequestAsync(
            key: userId,
            limit: 100,
            window: TimeSpan.FromMinutes(1),
            ct: context.RequestAborted
        );

        if (!allowed)
        {
            context.Response.StatusCode = StatusCodes.Status429TooManyRequests;
            await context.Response.WriteAsJsonAsync(new
            {
                error = "Rate limit exceeded"
            });
            return;
        }

        await _next(context);
    }
}

---

Summary: Backpressure Checklist

✅ Built-in rate limiting configured for public endpoints ✅ Per-user/per-IP limits to prevent monopolization ✅ Concurrency limits on expensive operations (semaphore pattern) ✅ Bounded queues for background work ✅ Distributed rate limiting (Redis) for multi-instance deployments ✅ Fail fast with proper 429/503 responses and Retry-After headers ✅ Monitoring of rejection rates, queue depth, semaphore contention

Key Insight: Backpressure is not about blocking users — it's about preserving system health so you can serve the requests you accept with good latency. A 429 response in 10ms is better than a timeout after 30 seconds.

Next: Caching Strategies - The fastest request is one you don't have to process.